Vamos a cargar el archivo de datos
data_url = 'ds_salaries.csv '
salaries <- read.csv(data_url)
if (!require("dplyr")) install.packages("dplyr"); library(dplyr)
## Loading required package: dplyr
## Warning: package 'dplyr' was built under R version 4.2.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
salaries <- salaries %>%
mutate(experience_level = recode(experience_level,
'EN' = 'Entry-Level',
'MI' = 'Mid-Level',
'SE' = 'Senior',
'EX' = 'Executive'))
library(countrycode)
## Warning: package 'countrycode' was built under R version 4.2.3
salaries <- salaries %>%
mutate(employee_residence = countrycode(employee_residence, origin = 'iso2c', destination = 'iso3c', warn = FALSE))
library(dplyr)
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.2.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
salario_medio_por_pais <- salaries %>%
group_by(employee_residence) %>%
summarize(Salario_Medio = mean(salary_in_usd, na.rm = TRUE))
salario_medio_por_pais <- salario_medio_por_pais %>%
filter(employee_residence != "ISR") # eliminado por valor atípico
fig <- plot_ly(type = 'choropleth', locationmode = 'ISO-3') %>%
add_trace(z = ~salario_medio_por_pais$Salario_Medio, locations = ~salario_medio_por_pais$employee_residence) %>%
layout(title = "Salary per Country",
geo = list(scope = 'world', showframe = FALSE, projection = list(type = 'mercator')),
colorbar = list(tickvals = c(0, 170000), ticktext = c("0", "170k"), tickmode = "array"),
legend = list(title = "Salary"))
fig
## Warning: 'layout' objects don't have these attributes: 'colorbar'
## Valid attributes include:
## '_deprecated', 'activeshape', 'annotations', 'autosize', 'autotypenumbers', 'calendar', 'clickmode', 'coloraxis', 'colorscale', 'colorway', 'computed', 'datarevision', 'dragmode', 'editrevision', 'editType', 'font', 'geo', 'grid', 'height', 'hidesources', 'hoverdistance', 'hoverlabel', 'hovermode', 'images', 'legend', 'mapbox', 'margin', 'meta', 'metasrc', 'modebar', 'newshape', 'paper_bgcolor', 'plot_bgcolor', 'polar', 'scene', 'selectdirection', 'selectionrevision', 'separators', 'shapes', 'showlegend', 'sliders', 'smith', 'spikedistance', 'template', 'ternary', 'title', 'transition', 'uirevision', 'uniformtext', 'updatemenus', 'width', 'xaxis', 'yaxis', 'barmode', 'bargap', 'mapType'
employees_by_country <- table(salaries$company_location)
employees_by_country <- sort(employees_by_country, decreasing = TRUE)
head(employees_by_country, 5)
##
## US GB CA ES IN
## 3040 172 87 77 58
Debido a la cantidad de empelos de diferencia entre países no tendría sentido comparar datos entre países debido a que no quedaría compensado por falta de muestras por lo que vamos a trabajar con los datos de US
library(dplyr)
library(plotly)
nuevo_dataset_us <- subset(salaries, company_location == "US")
media_salarios <- nuevo_dataset_us %>%
group_by(experience_level, company_size) %>%
summarize(Media_Salario = mean(salary_in_usd))
## `summarise()` has grouped output by 'experience_level'. You can override using
## the `.groups` argument.
fig <- plot_ly(media_salarios, x = ~experience_level, y = ~Media_Salario, color = ~company_size, type = 'bar') %>%
layout(title = "Salary per Experience Level and Company Size",
xaxis = list(title = "Experience"),
yaxis = list(title = "Salary (USD)"))
fig
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.2.3
library(dplyr)
library(plotly)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
media_salarios_por_title <- nuevo_dataset_us %>%
group_by(work_year, job_title) %>%
summarize(Media_Salario = mean(salary_in_usd))
## `summarise()` has grouped output by 'work_year'. You can override using the
## `.groups` argument.
media_salarios_por_title <- media_salarios_por_title %>%
pivot_wider(names_from = job_title, values_from = Media_Salario)
media_salarios_por_title <- melt(media_salarios_por_title, id.vars = "work_year", variable.name = "job_title", value.name = "Media_Salario")
fig <- plot_ly(data = media_salarios_por_title, x = ~work_year, y = ~Media_Salario, color = ~job_title, type = 'scatter', mode = 'lines+markers') %>%
layout(title = "Salary Evolution per Job Title",
xaxis = list(title = "Year"),
yaxis = list(title = "Salary (USD)"),
width = 800,
height = 600)
## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()
fig
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors